clear all;
close all;


%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile builds the IFS-Base dataset from the raw data provided by the Bundesbank's  Research Data and Service Centre (RDSC)
% 
%  =======================================================================================================================================================
% 
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


projectPath = 'C:\ConnectedFunds_Codebase\'

% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));


%% read IFS files

year = 2009;
month = 9;
umonth = [];

while 100*year+month<=202006
    umonth = [umonth; 100*year+month];
    
    if month<12
        month = month+1;
    else
        month = 1;
        year = year + 1;
    end
end
clear month year;

% loop over data files
ufund = [];
uwpkenn = [];
usubpos = [];
    
for t = 1:length(umonth)
    umonth(t),

    % load raw IFS data as provided by the RDSC
    IFS =  parquetread(strcat(projectPath, 'Data\IFS\raw\10.12757_Bbk.IFSBase.09092006_', num2str(umonth(t)), '.parquet'));
    IFS.DATUM = ones(height(IFS),1)*umonth(t);

    % save IFS data holdings with ISIN
    kk = find(strcmp(IFS.SUBPOS,'WP'));
    if umonth(t)>=201412
    IFS_Holdings = IFS(kk,{'DATE'; 'ISIN'; 'DATUM'; 'AMOUNT'; 'ESA2010'; 'CNTRY'; 'CNTRYGRP'; 'CURRENCY'; 'CURGRP'; 'SECCODE'; 'SECISINDUM'; 'SECQUOT'; 'SECNOMUN'; 'SECPRICE'; 'SECREPO'; 'SECLEN'; 'SECTERM'; 'SECTERMBGN'; 'SECTERMEND'; 'SECMAT'});
    else
    IFS_Holdings = IFS(kk,{'DATE'; 'ISIN'; 'DATUM'; 'AMOUNT'; 'ESA1995'; 'CNTRY'; 'CNTRYGRP'; 'CURRENCY'; 'CURGRP'; 'SECCODE'; 'SECQUOT'; 'SECNOMUN'; 'SECPRICE'; 'SECTERM'; 'SECMAT'});    
    end

    % drop WP holdings
    IFS(kk,:) = [];
    
    %% holdings w/o ISIN
    kk = find(strcmp(IFS.SUBPOS,'WPoI'));
    if umonth(t)>=201412
    IFS_Holdings_NoISIN = IFS(kk,{'DATE'; 'ISIN'; 'DATUM'; 'AMOUNT'; 'ESA2010'; 'CNTRY'; 'CNTRYGRP'; 'CURRENCY'; 'CURGRP'; 'SECCODE'; 'SECISINDUM'; 'SECQUOT'; 'SECNOMUN'; 'SECPRICE'; 'SECREPO'; 'SECLEN'; 'SECTERM'; 'SECTERMBGN'; 'SECTERMEND'; 'SECMAT'});
    else
    IFS_Holdings_NoISIN = IFS(kk,{'DATE'; 'ISIN'; 'DATUM'; 'AMOUNT'; 'ESA1995'; 'CNTRY'; 'CNTRYGRP'; 'CURRENCY'; 'CURGRP'; 'SECCODE'; 'SECQUOT'; 'SECNOMUN'; 'SECPRICE'; 'SECTERM'; 'SECMAT'});    
    end

    % drop WP holdings w/o ISIN
    IFS(kk,:) = [];
    
    %% hauptpos
    IFS_Hauptpos = IFS;
    if umonth(t)>=201412
    IFS_Hauptpos(:,{'ESA2010'; 'CNTRY'; 'CNTRYGRP'; 'CURRENCY'; 'CURGRP'; 'SECCODE'; 'SECISINDUM'; 'SECQUOT'; 'SECNOMUN'; 'SECPRICE'; 'SECREPO'; 'SECLEN'; 'SECTERM'; 'SECTERMBGN'; 'SECTERMEND'; 'SECMAT'}) = [];
    else
    IFS_Hauptpos(:,{'ESA1995'; 'CNTRY'; 'CNTRYGRP'; 'CURRENCY'; 'CURGRP'; 'SECCODE'; 'SECQUOT'; 'SECNOMUN'; 'SECPRICE'; 'SECTERM'; 'SECMAT'}) = [];    
    end
    
    % aggregate cash holdings (BANKG) before saving results [one ISIN can report multiple BANKG positions in a given month]
    ufund_t = unique(IFS_Hauptpos.ISIN);
    [~,idxWhereFund] = ismember(IFS_Hauptpos.ISIN,ufund_t);
    isBANKG = strcmp(IFS_Hauptpos.SUBPOS,'BANKG');
    dropData = zeros(height(IFS_Hauptpos),1);
    for i = 1:length(ufund_t)
       if sum(isBANKG(idxWhereFund==i))>1
          kk = find(idxWhereFund==i & isBANKG==1);
          IFS_Hauptpos.AMOUNT(kk(1)) = sum(IFS_Hauptpos.AMOUNT(kk));
          dropData(kk(2:end)) = 1;
          clear kk
       end
    end

    % drop duplicate entries
    IFS_Hauptpos(dropData==1,:) = []; 
    clear i* drop*;
    
    save(strcat(projectPath, 'Data\IFS\mat\IFS_Hauptpos_',num2str(umonth(t))),'IFS_Hauptpos');
    save(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_',num2str(umonth(t))),'IFS_Holdings');
    save(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_NoISIN_',num2str(umonth(t))),'IFS_Holdings_NoISIN');
    
    if t == 1
       ufund   = ufund_t;
       uwpkenn = unique(IFS_Holdings.SECCODE);
       usubpos = unique(IFS_Hauptpos.SUBPOS);
    else
       ufund   = unique([ufund; IFS_Hauptpos.ISIN]);
       uwpkenn = unique([uwpkenn; IFS_Holdings.SECCODE]);
       usubpos = unique([usubpos; IFS_Hauptpos.SUBPOS]);
    end
    
    clear IFS* kk ufund_t
end

save(strcat(projectPath, 'Data\IFS\mat\umonth'), 'umonth');
save(strcat(projectPath, 'Data\IFS\mat\ufund'), 'ufund');
save(strcat(projectPath, 'Data\IFS\mat\uwpkenn'), 'uwpkenn');
save(strcat(projectPath, 'Data\IFS\mat\usubpos'), 'usubpos');